################################################################################
### Replication code for:
### "Disguised Repression: Targeting Opponents with Non-Political Crimes 
### to Undermine Dissent"
### Jennifer Pan, Xu Xu, Yiqing Xu
################################################################################

## -----------------------------------------------------------------------------
## -----Setup-------------------------------------------------------------------
## -----------------------------------------------------------------------------

install.packages("dplyr")
install.packages("lubridate")
install.packages("ggplot2")
install.packages("scales")

rm(list = ls())

## Please set working directory to the "Replication" folder
# for example:
# setwd("~/Dropbox/ProjectY/Repression/Replication/")


library(dplyr)
library(lubridate)
library(ggplot2)
library(scales)


## -----------------------------------------------------------------------------
## -----Figure 4. Weibo Posts Citing Dissidents' Names by Crime Type------------
## -----------------------------------------------------------------------------

weibo_counts <- readRDS("./data/weibo_counts.rds")

# Total counts of Weibo posts about KOLs by day
byday <- weibo_counts %>% group_by(day=floor_date(date1, "day"), polct) %>%
  summarize(amount=n())

sum(byday$amount)
length(byday$amount)

byday_total <- weibo_counts %>% 
  group_by(date2=floor_date(as.Date(date1), "day"), polct) %>%
  summarize(total=n())

byday$rate <- byday$amount/byday_total$total

# Total counts of Weibo posts about KOLs
bymonth <- weibo_counts %>% group_by(month=floor_date(date1, "month"), polct) %>%
  summarize(amount=n())

sum(bymonth$amount)
length(bymonth$amount)

bymonth_total <- weibo_counts %>% 
  group_by(date2=floor_date(as.Date(date1), "month"), polct) %>%
  summarize(total=n())

bymonth$rate <- bymonth$amount/bymonth_total$total

## -----Figure 4a: Plotting total posts by groups by day------------------------

# Figure 4a: Plotting total posts by groups by day
freq_month_release <- ggplot(data=bymonth, aes(x = month, y = amount, group = polct, color = polct)) +
  geom_rect(aes(xmin = as.Date("2013-05-01"), xmax = as.Date("2013-08-01"), ymin = -Inf, ymax = Inf), fill = "grey95", alpha = 0.1, color = NA) +
  geom_rect(aes(xmin = as.Date("2013-08-01"), xmax = as.Date("2013-11-01"), ymin = -Inf, ymax = Inf), fill = "grey85", alpha = 0.05, color = NA) +
  geom_point(aes(shape = polct, color = polct), size = 2) +
  scale_x_date(breaks = "3 month", date_labels = "%Y%b", limits = as.Date(c("2010-03-01", "2014-03-01"))) +
  geom_line() +
  scale_y_continuous(labels = comma) +
  geom_vline(xintercept = as.numeric(as.Date("2013-08-01")), linetype = "dashed", lwd = 0.3) + 
  annotate(geom = "text", x = as.Date("2013-09-15"), y = 650000, label = "2013 Crackdown", size =5, angle = 90) +
  labs(title = "",
       y = "Number of Weibo Posts",
       x = "") + 
  theme_bw() + scale_color_manual(name = "Crimes", labels = c("Non-Political", "Political"), values = c("gray60", "gray30")) +
  scale_shape_manual(name = "Crimes", labels = c("Non-Political", "Political"), values = c(17, 19)) +
  theme(#text = element_text(family = "sans", size = 16),
    legend.position = c(0.26, 0.9),legend.direction = "horizontal",legend.text=element_text(size=13),legend.title=element_text(size=14),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    strip.text.x = element_text(size =240),
    axis.text=element_text(size=15),
    axis.title=element_text(size=15),
    axis.title.x = element_text(size = 20),
    plot.title = element_text(size = 25))
freq_month_release
ggsave("./graphs/fg_ttlposts.pdf", freq_month_release, width = 8, height = 6)


# Custom function to create labels in the format "log(number)" for natural logarithm
log_label <- function(x) {
  scales::comma(10^x)
}

## -----Figure 4b: Plotting logged posts by groups by day-----------------------

freq_day_release <- ggplot(data=byday, aes(x = day, y = log10(amount), group = polct, color = polct)) +
  geom_rect(aes(xmin = as.Date("2013-05-01"), xmax = as.Date("2013-08-01"), ymin = 0, ymax = 6), fill = "grey95", alpha = 0.1, color = NA) +
  geom_rect(aes(xmin = as.Date("2013-08-01"), xmax = as.Date("2013-11-01"), ymin = 0, ymax = 6), fill = "grey85", alpha = 0.05, color = NA) +
  geom_point(aes(shape = polct, color = polct), size = 2) +
  scale_x_date(breaks = "14 day", date_labels = "%b%d", limits = as.Date(c("2013-05-01", "2013-11-01"))) +
  geom_line() +
  scale_y_continuous(labels = log_label, limits=c(0,6)) +
  geom_vline(xintercept = as.numeric(as.Date("2013-08-01")), linetype = "dashed", lwd = 0.3) + 
  annotate(geom = "text", x = as.Date("2013-08-05"), y = 1.2, label = "Crackdown Starts", size =5, angle = 90,colour="gray30") +
  labs(title = "",
       y = "Number of Weibo Posts (Log Scale)",
       x = "") + 
  theme_bw() + scale_color_manual(name = "Crimes", labels = c("Non-Political", "Political"), values = c("gray60", "gray30")) +
  scale_shape_manual(name = "Crimes", labels = c("Non-Political", "Political"), values = c(17, 19)) +
  theme(#text = element_text(family = "sans", size = 16),
    legend.position = c(0.26, 0.9),legend.direction = "horizontal",legend.text=element_text(size=13),
    legend.title=element_text(size=14), legend.background = element_blank(),
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    strip.text.x = element_text(size =240),
    axis.text=element_text(size=15),
    axis.title=element_text(size=15),
    axis.title.x = element_text(size = 20),
    plot.title = element_text(size = 25))
freq_day_release
ggsave("./graphs/fg_ttlpostsday.pdf", freq_day_release, width = 8, height = 6)


## -----------------------------------------------------------------------------
## -----Figure 5. Calls for Critics’ Release------------------------------------
## -----------------------------------------------------------------------------

weibo_counts <- readRDS("./data/weibo_counts.rds")

bymonth <- weibo_counts %>% group_by(month=floor_date(date1, "month"), polct) %>%
  summarize(amount=sum(releasettln)) #Count dissent posts by group

bymonth_total <-  weibo_counts %>% 
  group_by(date2=floor_date(as.Date(date1), "month"), polct) %>%
  summarize(total=n()) #Count total posts by group

bymonth$rate <- bymonth$amount/bymonth_total$total #Calculate dissent rate

bymonth$lnamount <- log(bymonth$amount+1) #Calculate dissent rate

# Plot dissent rates by group by month
prop_month_release <- ggplot(data=bymonth, aes(x = month, y = rate, group = polct, color = polct)) +
  geom_point(aes(shape = polct, color = polct), size = 1.5) +
  scale_x_date(breaks = "3 month", date_labels = "%Y%m", limits = as.Date(c("2010-03-01", "2014-03-01"))) +
  geom_line() +
  scale_y_continuous(labels = function(x) paste0(x*100, "%"), breaks=seq(0,0.1,0.02)) +
  #geom_vline(xintercept = as.POSIXct(as.Date("2013-08-01")), linetype = "dashed", lwd = 2) + 
  geom_vline(xintercept = as.numeric(as.Date("2013-08-01")), linetype = "dashed", lwd = 0.3) + 
  annotate(geom = "text", x = as.Date("2013-06-01"), y = 0.03, label = "2013 Crackdown", size =6, angle = 90) +
  labs(title = "",
       y = "Proportion of Dissenting Weibo Posts",
       x = "") + 
  theme_bw() + scale_color_manual(name = "Crimes", labels = c("Non-Political", "Political"), values = c("gray60", "gray30")) +
  scale_shape_manual(name = "Crimes", labels = c("Non-Political", "Political"), values = c(17, 19)) +
  theme(#text = element_text(family = "sans", size = 16),
    legend.position = c(0.25, 0.9),legend.direction = "horizontal",legend.text=element_text(size=11),legend.title=element_text(size=12),
    axis.text.x = element_text(angle = 90, hjust = 1),
    strip.text.x = element_text(size =240),
    axis.text=element_text(size=15),
    axis.title=element_text(size=15),
    axis.title.x = element_text(size = 20),
    plot.title = element_text(size = 25))
prop_month_release
ggsave("./graphs/fg_rlsprop.pdf", prop_month_release, width = 10, height = 7)


## -----------------------------------------------------------------------------
## -----Figure 6. Online dissent for individual critics-------------------------
## -----------------------------------------------------------------------------

weibo_dissent <- readRDS("./data/weibo_dissent.rds")

pct_dissident_release <- ggplot(weibo_dissent, aes(fill=group, y=proportion, x=reorder(row.names(weibo_dissent),-proportion))) + 
  geom_bar(position="dodge", stat="identity") +
  labs(title = "",
       y = "Proportion of Dissenting Weibo Posts",
       x = "", fill = "Crimes") +
  scale_fill_manual(values = c("gray60", "gray30")) +
  scale_y_continuous(labels = function(x) paste0(x*100, "%"), breaks=seq(0,0.11,0.02)) +
  coord_cartesian(ylim = c(0, 0.115)) +
  geom_text(aes(label=paste0(dissent,"/",mentions)),color="black",size=3,position=position_dodge(width = 1), angle = 90,hjust=-0.3) +
  theme_bw() +
  theme(#text = element_text(family = "sans", size = 16),
    legend.position = "right",
    axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.35),
    strip.text.x = element_text(size =240),
    axis.text=element_text(size=12),
    axis.title=element_text(size=12),
    axis.title.x = element_text(size = 20),
    plot.title = element_text(size = 25))
pct_dissident_release
ggsave("./graphs/fg_pctpostsbw.pdf", pct_dissident_release, width = 10, height = 7)


## -----------------------------------------------------------------------------
## -----Figure 7. Stance toward critics-----------------------------------------
## -----------------------------------------------------------------------------

## -----Figure 7a: Non-political crimes-----------------------------------------

stance_moral <- readRDS("./data/weibo_stance_morality.rds")

stance_moral$condition <- c("morarrestbf", "morarrest", "morarrestbf", "morarrest")
stance_moral$condition <- factor(stance_moral$condition, 
  levels = c("morarrestbf", "morarrest"), 
  labels = c("Before Arrest\n(Non-Political Crime)", "After Arrest\n(Non-Political Crime)"))

# Create a ggplot line plot
plot <- ggplot(stance_moral, aes(x = condition, y = mean, group = target)) +
  geom_line(aes(color = target, linetype = target), size = 1, show.legend=F) +
  geom_point(aes(color = target), show.legend=F) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_h), width = 0.1, color = "black") +
  labs(title = "",
       x = "",
       y = "Proportion of Posts") +
  annotate(geom = "text",x=2.2,y=0.51, label = "Against", size =5, color="black") + 
  annotate(geom = "text",x=2.15, y=0.275, label = "Pro", size =5, color="black") + 
  ylim(0.1, 0.65) + 
  theme_minimal() +
  guides(color = guide_legend(title = NULL)) +
  theme(axis.text.x = element_text(size = 14, color="black"),
        axis.title.y = element_text(size = 14), 
        panel.border = element_rect(color = "black", fill = NA, size = 1)) +
  scale_color_manual(values = c("black", "black"), guide = FALSE) +
  scale_linetype_manual(values = c("dashed", "solid"), guide = FALSE)
plot
ggsave("./graphs/fg_stance_moral.pdf", plot, height=6, width=6)

## -----Figure 7b: Political crimes---------------------------------------------

stance_political <- readRDS("./data/weibo_stance_political.rds")

stance_political$condition <- c("polarrestbf", "polarrest", "polarrestbf", "polarrest")
stance_political$condition <- factor(stance_political$condition, 
  levels = c("polarrestbf", "polarrest"), 
  labels = c("Before Arrest\n(Political Crime)", "After Arrest\n(Political Crime)"))

# Create a ggplot line plot
plot <- ggplot(stance_political, aes(x = condition, y = mean, color = target, group = target)) +
  geom_line(aes(color = target, linetype = target), size = 1, show.legend=F) +
  geom_point(aes(color = target), show.legend=F) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_h), width = 0.1, color = "black") +
  labs(title = "",
       x = "",
       y = "Proportion of Posts") +
  annotate(geom = "text",x=2.15, y=0.58, label = "Pro", size =5, color="black") + 
  annotate(geom = "text",x=2.2, y=0.19, label = "Against", size =5, color="black") + 
  ylim(0.1, 0.65) + 
  theme_minimal() +
  guides(color = guide_legend(title = NULL)) +
  theme(axis.text.x = element_text(size = 14, color="black"),
        axis.title.y = element_text(size = 14), 
        panel.border = element_rect(color = "black", fill = NA, size = 1)) +
  scale_color_manual(values = c("black", "black"), guide = FALSE) +
  scale_linetype_manual(values = c("dashed", "solid"), guide = FALSE)
plot
ggsave("./graphs/fg_stance_political.pdf", plot, height=6, width=6)


## -----------------------------------------------------------------------------
## -----Figure A11. Overall Sentiment of Weibo Posts----------------------------
## -----------------------------------------------------------------------------

snownlp <- readRDS("./data/weibo_snownlp.rds")

snownlp$condition <- factor(snownlp$condition, 
                            levels = c("Before Arrest", "After Arrest"))

plot <- ggplot(snownlp, aes(x = condition, y = mean, group = target, color = target)) +
  geom_line(aes(color = target, linetype = target), size = 1, show.legend=F) +
  geom_point(aes(color = target), show.legend=F) +
  geom_errorbar(aes(ymin = ci_l, ymax = ci_h), width = 0.2, position = position_dodge(0)) +
  labs(title = "",
       x = "",
       y = "More Negative < Mean Sentiment > More Positive") +
  annotate(geom = "text",x=2.1, y=0.823, label = "Political\nCrime", size =5, color="black", hjust=0) + 
  annotate(geom = "text",x=2.1, y=0.803, label = "Non-Political\nCrime", size =5, color="black", hjust=0) + 
  ylim(0.79, 0.92) + 
  theme_minimal() +
  scale_color_manual("Legend", values = c("gray30", "gray60"), guide=F) +
  theme(axis.text.y = element_text(size = 14), 
        axis.text.x = element_text(size = 14, color="black"),
        axis.title.y = element_text(size = 14, color="black"), 
        panel.border = element_rect(color = "black", fill = NA, size = 1)) 
plot
ggsave("./graphs/fg_snownlp.pdf", plot, height=6, width=6)


## -----------------------------------------------------------------------------
## -----Figure A12. Word frequency regarding critics pre- and post-arrest-------
## -----------------------------------------------------------------------------

load("./data/xuewangtext.RData")

## -----Figure A12a: Xue Manzi Odds---------------------------------------------

#Getting data for Xue Manzi (after the arrest)

arxmzstring <- paste(arxmzdata, collapse = " ")
arxmzList <- lapply(arxmzstring, strsplit," ")
head(arxmzList)

xmztxtChar = unlist(arxmzList)

#Cleaning the data 
head(xmztxtChar)
xmztxtChar = gsub("\\.|,|\\!|:|;|\\?|-|﹏|﹉|　| ","",xmztxtChar) #clean symbol(.,!:;?-)
xmztxtChar = gsub("0|1|2|3|4|5|6|7|8|9","",xmztxtChar) #clean numbers
xmztxtChar = gsub("a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z","",xmztxtChar) #clean eng character
xmztxtChar = gsub("A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z","",xmztxtChar) #clean ENG character
xmztxtChar = gsub("转发|微博|转","",xmztxtChar) #clean repost words

xmztxtChar = xmztxtChar[xmztxtChar!=""]

#Calculating frequencies
xmzfreq <- as.data.frame(table(xmztxtChar))
colnames(xmzfreq) = c("Word","freq")
length(xmzfreq$prop)

xmzordfreq <- xmzfreq[order(xmzfreq$freq,decreasing=T),]
xmzordfreq <- xmzordfreq[2:1000,]

#Getting data for Xue Manzi (before the arrest)
arxmzbfstring <- paste(arxmzbfdata, collapse = " ")
arxmzbfList <- lapply(arxmzbfstring, strsplit," ")
head(arxmzbfList)

xmzbftxtChar = unlist(arxmzbfList)

#Cleaning the data 
head(xmzbftxtChar)
xmzbftxtChar = gsub("\\.|,|\\!|:|;|\\?|-|﹏|﹉|　| ","",xmzbftxtChar) #clean symbol(.,!:;?-)
xmzbftxtChar = gsub("0|1|2|3|4|5|6|7|8|9","",xmzbftxtChar) #clean number
xmzbftxtChar = gsub("a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z","",xmzbftxtChar) #clean eng character
xmzbftxtChar = gsub("A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z","",xmzbftxtChar) #clean ENG character
xmzbftxtChar = gsub("转发|微博|转","",xmzbftxtChar) #clean repost words

xmzbftxtChar = xmzbftxtChar[xmzbftxtChar!=""]

#Calculating frequencies
xmzbffreq = as.data.frame(table(xmzbftxtChar))
colnames(xmzbffreq) = c("Word","bffreq")

xmzbfordfreq = xmzbffreq[order(xmzbffreq$bffreq,decreasing=T),]
xmzbfordfreq <- xmzbfordfreq[2:1000,]

xmzmerge <- merge(xmzbffreq, xmzfreq, by.x = "Word", by.y="Word")
xmzmerge <- merge(xmzbfordfreq, xmzordfreq, by.x = "Word", by.y="Word")

xmzmerge <- xmzmerge %>% mutate(
  bfprop = bffreq/sum(bffreq),
  prop = freq/sum(freq)
)

sum(xmzmerge$bfprop)
sum(xmzmerge$prop)

xmzmergeorder <- xmzmerge[order(xmzmerge$bffreq,decreasing=T),]

#Calculating log odds ratio
xmzmergeorder$logodds <- log(xmzmergeorder$prop/(1-xmzmergeorder$prop))-log(xmzmergeorder$bfprop/(1-xmzmergeorder$bfprop))
xmzmergeorder$logfreq = log(xmzmergeorder$bffreq) + log(xmzmergeorder$freq)

#Adding translations
xmzmergeorder$engword <- c("China", "Li Kaifu", "Ren Zhiqiang", "Children", "Lawyer", "Support", "Society", "Agree", "Beijing", "Anger", 
                           "Whoring", "No", "Pan Shiyi", "Hope", "History", "Recommend", "Investment", "Xu Xin", "Attention", "Yuan Yulai", "Think", 
                           "People", "Know", "Weak", "Government", "USA", "Eat", "Too", "Video", "Headline News", "Matter", "Problem", "News", "Tears", 
                           "Environmental Protection", "Microphone", "Praise", "Country", "Heart", "Li", "Li Chengpeng", "Citizens", "Love", "True", 
                           "Micro", "Need", "Study", "Zhao Xiao", "He Bing", "Era", "Wang Shi", "Education", "Citizen", "Time", "Reform", "Years Old", 
                           "Official", "Journalist", "Jian Rong", "Occur", "Correct", "Bull", "Think", "CCTV", "Mr.", "New", "Article", "Talk", "Life", 
                           "Media", "Life", "Author", "Public Welfare", "Reply", "World", "Success", "System", "Special Supply", "Walk", "Arrest", "Out", 
                           "Like", "Report", "Yuan", "Money", "Candle", "Important", "Energy", "Old", "Protection", "Old Man", "Law", "Become", "Teacher", 
                           "Internet", "Rob", "Professor", "10,000", "Miles", "Shocked", "Indeed", "Real", "Economy", "Death", "Cultural Revolution", "Development", 
                           "Leadership", "Laugh", "Individual", "Effort", "Do", "Read", "Ask", "Front", "Serious", "Sweat", "Shanghai", "People's Daily", 
                           "Responsibility", "Truth", "Question", "Company", "Inspiring", "Corrupt Official", "Suggestion", "Daily", "Network", "Rise Up", 
                           "Send", "Genuine", "Official", "Friend", "Dream", "Really", "Environment", "Whether", "Power", "Force", "Thought", "Haha", 
                           "Mighty", "Become", "Politics", "Snicker", "Zhang Lifan", "Help", "Ability", "Share", "First", "Report", "Dong Liangjie", "Story", 
                           "Yuan", "Zhang", "Child", "Ding Laifeng", "Block", "Buy", "Dare Not", "First", "Apologize", "Comment", "Morality", "Opportunity", 
                           "Male", "Event", "Fast", "Vigorously", "Become", "Female", "Point", "Seems", "Way", "Sina", "Onlookers", "Life", "Suspected", "How", 
                           "Appeal", "Largest", "Constitutionalism", "Representative", "Fair", "Find", "Think", "Sexual Relationship", "Police", "Judge", 
                           "Believe", "Speak", "Angel", "Actually", "System", "Masses", "Unable", "Basic", "Indeed", "Work", "Public", "Sell", "Bring", 
                           "Common People", "Solve", "Have to", "Choose", "Mobile Phone", "Young Girl", "Suddenly", "Safe", "Obtain", "Body", "Listen", 
                           "Dig", "Expert", "Nationwide", "Wang", "Currently", "Spirit", "Originally", "Yesterday", "Son", "Qin Huohuo", "Quite a Few", 
                           "Tell", "Don't Know", "Booger", "Rescue", "Police", "Barbarian", "Freedom", "Discover", "Absolutely", "Compassion", "Democracy", 
                           "Rape", "Thank", "Decline", "Scary", "Help", "Chef", "Currently", "Bottom Line", "Girl", "Black", "Safe", "Investigation", 
                           "Chen", "Fact", "See", "Is it", "Only Can", "Blog", "Speak", "World", "People's Network", "Good", "Pitiful", "Can", "Less Than", 
                           "Identity", "Personnel", "Client", "Several", "Message", "Announce", "Understand", "Write", "Rumor", "Old Man", "Tall", 
                           "Long", "Voice", "Everyday", "Happiness", "Applaud", "IQ", "Demand", "Thing", "Shao", "Mainland", "Gao", "So-Called", "Photo", 
                           "Yuan Li", "Tomorrow", "Mayor", "Principal", "Crime", "That Year", "Woman", "Gang", "Cancer", "Domestic", "Strength", "Net", 
                           "Wrong", "God Bless", "Solicit Prostitute", "Tianchao", "One", "North Korea", "Relationship", "Free", "Xue", "Flower", "Netizen", 
                           "Not Possible", "Phoenix", "Thoroughly", "Ying", "This One", "Prostitution", "Fa", "Understand", "Behavior", "At Least", 
                           "Prostitution", "Million", "Reason", "Release", "Capture", "Past", "Billion", "Dare", "Rest Assured", "Celebrity", "Man", 
                           "Increasingly", "Finance", "Dizzy", "Information", "Cheng", "More", "Name", "Situation", "Happy", "Especially", "Justice", 
                           "One Side", "Contact", "Always", "Vice", "Value", "Completely", "Hahaha", "Space", "He Weifang", "Ma Ying-jeou", "Ma", 
                           "Gold", "Standard", "Exercise Book", "Right", "Investor", "Joke", "Direct", "Participate", "Complete", "Interest", "Big Shot", 
                           "Beijing Police", "Suspect", "Answer", "Shameless", "Why", "Understand", "Easy", "Chinese", "Normal", "Prove", "Include", 
                           "Not", "Mode", "Yang", "Rumor", "Run", "This Matter", "Surely", "Name", "Online", "Famous", "Supervision", "People", "One", 
                           "Top", "Number", "Cloud", "Organization", "Act", "Heaven", "Influence", "One Day", "Exist", "Nationality", "Silly", "On", 
                           "Reason", "Behind", "City Newspaper", "Department", "Despise", "Feel", "Alive", "Fan", "People's Republic of China", 
                           "Criticize", "Obvious", "Administrative Detention", "Wang Ying", "Public Intellectual", "Oppose", "Reality", "Two", 
                           "Official", "Early", "Bad", "Socialism", "Liu", "Hero", "Party", "Common Sense", "West", "Public", "Confirm", "Anti-Kidnapping", 
                           "Today", "Station", "Estimate", "Receive", "Not Seen", "Night", "Three", "Understand", "Beili", "Tianyi", "Pay Attention", 
                           "Transmit", "Explain", "Heard", "Character", "Charity", "Rule of Law", "Cute", "Accept", "Anhui", "Community", "Careful", "Abroad", "Simple", 
                           "Young", "Admit Guilt", "See", "Action", "Front", "Taxpayer", "Not Good", "Opinion", "Participate", "Anyway", "Inside", "Time", "Special", 
                           "Smash", "Huge", "Son", "Believe", "Control", "Reason", "Corruption", "Before", "Two", "Play", "Have", "Logic", "Feature", "Scold", "Henanese"
)

xmz_odds_ratio <- ggplot(xmzmergeorder, aes(x=logodds, y=logfreq)) +
  geom_text(data= xmzmergeorder, aes(label=engword, size = 1.5^logfreq, color = logodds > 0), angle=0) +
  scale_size(range = c(0, 10), guide = "none") + xlim(-2.3,4.1) +
  labs(title = "",
       y = "Combined Log Word Frequency",
       x = "Log Odds of Word in Post- vs. Pre-arrest Weibo Posts") + 
  theme_bw() + scale_color_manual(name = "", labels = c("Log Odds < 0", "Log Odds > 0"), values = c("gray60", "gray20")) +
  theme(#text = element_text(family = "sans", size = 16),
    legend.position = "bottom",
    strip.text.x = element_text(size =240),
    axis.text=element_text(size=12),
    axis.title=element_text(size=12),
    axis.title.x = element_text(size = 12),
    plot.title = element_text(size = 25))
xmz_odds_ratio
ggsave("./graphs/fg_xmzodds.pdf", xmz_odds_ratio, width = 10, height = 7)


## -----Figure A12b: Wang Gongquan Odds-----------------------------------------

#Getting data for Wang Gongquan (after the arrest)
arwgqstring <- paste(arwgqdata, collapse = " ")
arwgqList <- lapply(arwgqstring, strsplit," ")
head(arwgqList)

wgqtxtChar = unlist(arwgqList)

#Cleaning the data 
wgqtxtChar = gsub("\\.|,|\\!|:|;|\\?|-|﹏|﹉|　| ","",wgqtxtChar) #clean symbol(.,!:;?-)
wgqtxtChar = gsub("0|1|2|3|4|5|6|7|8|9","",wgqtxtChar) #clean number
wgqtxtChar = gsub("a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z","",wgqtxtChar) #clean eng character
wgqtxtChar = gsub("A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z","",wgqtxtChar) #clean ENG character
wgqtxtChar = gsub("转发|微博|转","",wgqtxtChar) #clean repost words

wgqtxtChar = wgqtxtChar[wgqtxtChar!=""]

#Calculating frequencies
wgqfreq <- as.data.frame(table(wgqtxtChar))
colnames(wgqfreq) = c("Word","freq")

wgqordfreq <- wgqfreq[order(wgqfreq$freq,decreasing=T),]
wgqordfreq <- wgqordfreq[2:1000,]


#Getting data for Wang Gongquan (before the arrest)
arwgqbfstring <- paste(arwgqbfdata, collapse = " ")
arwgqbfList <- lapply(arwgqbfstring, strsplit," ")


wgqbftxtChar = unlist(arwgqbfList)

#Cleaning the data 
wgqbftxtChar = gsub("\\.|,|\\!|:|;|\\?|-|﹏|﹉|　| ","",wgqbftxtChar) #clean symbol(.,!:;?-)
wgqbftxtChar = gsub("0|1|2|3|4|5|6|7|8|9","",wgqbftxtChar) #clean number
wgqbftxtChar = gsub("a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z","",wgqbftxtChar) #clean eng character
wgqbftxtChar = gsub("A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z","",wgqbftxtChar) #clean ENG character
wgqbftxtChar = gsub("转发|微博|转","",wgqbftxtChar) #clean repost words

wgqbftxtChar = wgqbftxtChar[wgqbftxtChar!=""]

#Calculating frequencies
wgqbffreq = as.data.frame(table(wgqbftxtChar))
colnames(wgqbffreq) = c("Word","bffreq")


wgqbfordfreq = wgqbffreq[order(wgqbffreq$bffreq,decreasing=T),]
wgqbfordfreq <- wgqbfordfreq[2:1000,]


wgqmerge <- merge(wgqbffreq, wgqfreq, by.x = "Word", by.y="Word")
wgqmerge <- merge(wgqbfordfreq, wgqordfreq, by.x = "Word", by.y="Word")

wgqmerge <- wgqmerge %>% mutate(
  bfprop = bffreq/sum(bffreq),
  prop = freq/sum(freq)
)


wgqmergeorder <- wgqmerge[order(wgqmerge$bffreq,decreasing=T),]


#Calculating log odds ratio
wgqmergeorder$logodds <- log(wgqmergeorder$prop/(1-wgqmergeorder$prop))-log(wgqmergeorder$bfprop/(1-wgqmergeorder$bfprop))
wgqmergeorder$logfreq = log(wgqmergeorder$bffreq) + log(wgqmergeorder$freq)

paste('"', wgqmergeorder$Word, '"', sep = "", collapse = ", ")

#Adding translations
wgqmergeorder$engword <- c("Entrepreneur", "Success", "Start-Up", "Internet", "Industry", "China", "Elope", 
                           "Suggestion", "Traditional", "Era", "Share", "Direction", "Become", "Magazine", "Future", 
                           "Many Years", "Searching", "Help", "Zuo Zhijian", "Microphone", "Enterprise", 
                           "Investment", "Representative", "Love", "Lose", "Japan", "Strength", "Liu Suli", 
                           "Problem", "Business", "Official", "Power", "Right", "Reporter", "Ren Zhiqiang", 
                           "Economy", "Well-Known", "Wang Shi", "Politics", "Public", "Pan Shiyi", "Do", 
                           "Society", "Express", "Currently", "Government", "Best", "Micro", "Defend", "Bo", 
                           "Point", "Only Can", "Person", "Friend", "Matter", "Country", "Know", "Recognize", 
                           "Wantong", "Praise", "Not", "Law", "In Business Speak Business", "Xue Manzi", "Interview", 
                           "Poet", "Phoenix Network", "Beijing", "Seems", "Wang", "Want", "Liu Chuanzhi", "See", 
                           "One", "Send", "Sit", "Telephone", "Can", "Hope", "Sentimental", "World", "Money", 
                           "Character", "Thief", "Fubao", "Award", "Businessman", "Important", "Deactivate Account", 
                           "Time", "Right", "Feng Lun", "Bear", "Hibernation", "Before", "Merit", "News", "Mr.", 
                           "Citizen", "Man", "Provide", "Investor", "Thought", "Too", "Always", "Netizen", "Scope", 
                           "Daughter", "Face", "First", "Zhao Xiao", "Gentleman", "Situation", "Give Up", "Walk", 
                           "Expect", "Understand", "Reason", "Turn Into", "Listen", "Truth", "Net", "Go", "America", 
                           "Individual", "Need", "Phoenix", "Sina", "Organization", "Speak", "Ask", "Die", "True", 
                           "Really", "Pressure", "Ideal", "Identity", "So-Called", "Strive For", "Onlook", 
                           "Wang Ying", "Finance", "Public Intellectual", "Tell", "In Reality", "Conspire", 
                           "That Year", "Lawyer", "Station", "Once", "People's Congress Representative", "Child", 
                           "Love", "Think", "Go Out", "Participate in Politics", "Life", "Progress", "Publish", 
                           "Reply", "Li Kaifu", "Freedom", "Talk", "Willing", "Ruling Party", "Matter", "Collude", 
                           "Privilege", "Wang Wei", "Duty", "Business", "He Bing", "Again", "Colleague", "Magazine", 
                           "Join", "Whether", "Right", "Choose", "Ye Kuangzheng", "Perform", "Sacrifice", "Bribery", 
                           "Ten Thousand", "Client", "West", "Avoid", "Blog Post", "Inside", "Chen You", "Zhang Lifan", 
                           "Public", "Attention", "Immigrate", "Chinese Edition", "Event", "Message", "Teacher", 
                           "Power and Right", "Think", "Thank", "Recommend", "Not For", "Effort", "Like", 
                           "Business World", "Truth", "Leave", "Explain", "Revolution", "Founder", "Home", "Refuse", 
                           "Could It Be", "Still", "Cry", "Wish", "Rule", "Department", "Life", "Woman", "Article", 
                           "Laugh", "Change", "Technology", "On", "Make", "Safety", "Should", "Indeed", "Call", 
                           "Carry", "Two", "Support", "View", "Publish", "Prostitution", "Wish", "Shoot", "New", 
                           "Future", "Fear", "Change", "True", "Blessing", "Old Wang", "Shout", "Do", "Venture Capital", 
                           "Originally", "Media", "Promote", "Reform", "Believe", "Not Know", "Liu", "Participate", 
                           "Admit", "Li", "First", "NetEase", "Comment", "Chi Susheng", "Theme", "Write", "Several", 
                           "Story", "Opportunity", "Speech", "Read", "Perhaps", "Seem", "Public", "Public Welfare", 
                           "Liu Shengjun", "CCTV", "Realize", "Year", "Cause", "Old", "Property", "Road", "Relationship", 
                           "Persist", "Establish", "Catch", "Storm", "Subway", "Divorce", "Awesome", "At Least", 
                           "Two Years", "Brother", "Touched", "Surely", "Wind", "Go Public", "Secret Laugh", "Hehe", 
                           "Cheng Jie", "Democracy", "Silence", "Xiao Shu", "Dinner", "One", "One", "People", "Estimate", 
                           "Blog", "Majority", "Depth", "Action", "Get Up", "Human World", "Pass", "Consensus", 
                           "Participate", "Surprised", "Common Sense", "Become", "Property", "Accept", "Rule of Law", 
                           "South", "Collude", "Voice", "Mostly", "Fear", "Meaning")

wgq_odds_ratio <- ggplot(wgqmergeorder, aes(x=logodds, y=logfreq)) +
  geom_text(data= wgqmergeorder, aes(label=engword, size = 1.5^logfreq, color = logodds > 0), angle=0) +
  scale_size(range = c(0, 10), guide = "none") + xlim(-2.3,4.1) +
  labs(title = "",
       y = "Combined Log Word Frequency",
       x = "Log Odds of Word in Post- vs. Pre-arrest Weibo Posts") + 
  #scale_colour_gradient2(low = "gray60", mid = "gray60", high = "gray30") +
  theme_bw() + scale_color_manual(name = "", labels = c("Log Odds < 0", "Log Odds > 0"), values = c("gray60", "gray20")) +
  theme(#text = element_text(family = "sans", size = 16),
    legend.position = "bottom",
    strip.text.x = element_text(size =240),
    axis.text=element_text(size=12),
    axis.title=element_text(size=12),
    axis.title.x = element_text(size = 12),
    plot.title = element_text(size = 25))
wgq_odds_ratio
ggsave("./graphs/fg_wgqodds.pdf", wgq_odds_ratio, width = 10, height = 7)

